########################################################################
#  Searx-Qt - Lightweight desktop application for Searx.
#  Copyright (C) 2020-2024  CYBERDEViL
#
#  This file is part of Searx-Qt.
#
#  Searx-Qt is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  Searx-Qt is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
########################################################################

from threading import Thread, Lock
import pycurl
import random
import json
import time
from jsonschema import validate as JsonValidate
from jsonschema.exceptions import ValidationError, SchemaError
from urllib.parse import urlparse

from searxqt.core import log

## @see https://github.com/pycurl/pycurl/blob/master/examples/retriever-multi.py
# We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
# the libcurl tutorial for more info.
try:
    import signal
    from signal import SIGPIPE, SIG_IGN
except ImportError:
    pass
else:
    signal.signal(SIGPIPE, SIG_IGN)

HAVE_SOCKS = False
try:
    import socks
    HAVE_SOCKS = True
    del socks
except ImportError:
    log.debug("pysocks not installed! No socks proxy support.")

import ssl
import os
capath = ssl.get_default_verify_paths().capath
if not capath:
    raise Exception("Could not find ca path TODO")
CA_CRT_PATH = os.path.join(capath, "ca-certificates.crt")
del ssl
del os
del capath

from .schema import Schemas


## Our error type definitions.
class ErrorType:
    Success = 0
    HttpError = 1
    ConnectionError = 2
    Timeout = 3
    WrongStatus = 4
    DecodeError = 5
    NoResults = 6
    ProxyError = 7
    SSLError = 8
    InvalidSchema = 9
    ContentSizeExceeded = 10
    CorruptImage = 11
    Cancelled = 12
    Other = 13


## cURL error map to our own `ErrorType`.
## @see https://curl.se/libcurl/c/libcurl-errors.html
CurlErrorMap = {
    pycurl.E_HTTP_RETURNED_ERROR     : ErrorType.HttpError,
    pycurl.E_COULDNT_CONNECT         : ErrorType.ConnectionError,
    pycurl.E_COULDNT_RESOLVE_HOST    : ErrorType.ConnectionError,
    pycurl.E_OPERATION_TIMEDOUT      : ErrorType.Timeout,
    pycurl.E_BAD_CONTENT_ENCODING    : ErrorType.DecodeError,
    97                               : ErrorType.ProxyError,  # CURLE_PROXY
    pycurl.E_COULDNT_RESOLVE_PROXY   : ErrorType.ProxyError,
    pycurl.E_SSL_CONNECT_ERROR       : ErrorType.SSLError,
    pycurl.E_PEER_FAILED_VERIFICATION: ErrorType.SSLError,
    pycurl.E_SSL_SHUTDOWN_FAILED     : ErrorType.SSLError,
    pycurl.E_SSL_INVALIDCERTSTATUS   : ErrorType.SSLError,
    pycurl.E_FILESIZE_EXCEEDED       : ErrorType.ContentSizeExceeded
}


## `ErrorType` -> `str` map.
## Use this to get a string of the error name.
ErrorTypeStr = {
    ErrorType.Success: "Success",
    ErrorType.HttpError: "HttpError",
    ErrorType.ConnectionError: "ConnectionError",
    ErrorType.Timeout: "Timeout",
    ErrorType.WrongStatus: "WrongStatus",
    ErrorType.DecodeError: "DecodeError",
    ErrorType.NoResults: "NoResults",
    ErrorType.ProxyError: "ProxyError",
    ErrorType.SSLError: "SSLError",
    ErrorType.InvalidSchema: "InvalidSchema",
    ErrorType.ContentSizeExceeded: "ContentSizeExceeded",
    ErrorType.CorruptImage: "CorruptImage",
    ErrorType.Cancelled: "Cancelled",
    ErrorType.Other: "Other"
}


class ProxyProtocol:
    HTTP    = 1
    SOCKS4  = 2
    SOCKS5  = 4


ProxyProtocolString = {
     0: "none",
     1: "http",
     2: "socks4",
     4: "socks5"
}


ProxyTypes = {
    ## @see https://github.com/pycurl/pycurl/blob/master/src/module.c
    "HTTP 1.1": pycurl.PROXYTYPE_HTTP,
    "HTTP 1.0": pycurl.PROXYTYPE_HTTP_1_0,

    # These are not included in pycurl, see https://github.com/pycurl/pycurl/issues/622
    # CURLPROXY_HTTPS = 2,  /* HTTPS but stick to HTTP/1 added in 7.52.0 */
    # CURLPROXY_HTTPS2 = 3, /* HTTPS and attempt HTTP/2 added in 8.2.0 */
    "HTTPS": 2,
    "HTTPS2": 3,
}


if HAVE_SOCKS:
    ProxyTypes.update({
        "Socks 4": pycurl.PROXYTYPE_SOCKS4,
        "Socks 4a (with DNS)": pycurl.PROXYTYPE_SOCKS4A,
        "Socks 5": pycurl.PROXYTYPE_SOCKS5,
        "Socks 5h (with DNS)": pycurl.PROXYTYPE_SOCKS5_HOSTNAME
    })


class HttpNewRequestSettings:
    """! Read-only request settings
    """
    def __init__(self, data):
        """
        @param data A dict object that has no references to other objects
        """
        self._data = data

    def __str__(self):
        return str(self._data)

    def __getattr__(self, key):
        return self._data[key]


class HttpRequestSettings:
    """! Request settings
    """

    def __init__(self):
        self._verifySsl = True
        self._timeout = 10
        self._recvLimit = 5 * 1024 * 1024
        self._chunkLimit = 16 * 1024
        self._useragents = []
        self._randomUseragent = False
        self._extraHeaders = [
            "Accept: text/html;q=0.8",
            "Accept-Encoding: gzip, deflate",
            "Accept-Language: en-US,en;q=0.5",
            "Connection: keep-alive",
            "DNT: 1",
            "Upgrade-Insecure-Requests: 1"
        ]
        self._proxyEnabled = False

        ## "netloc:port" or None
        self._proxyAddress = None

        ## "username:password" or None
        self._proxyAuth = None

        ## @see https://github.com/pycurl/pycurl/blob/master/src/module.c
        ## pycurl.PROXYTYPE_HTTP
        ## pycurl.PROXYTYPE_HTTP_1_0
        ## pycurl.PROXYTYPE_SOCKS4
        ## pycurl.PROXYTYPE_SOCKS4A
        ## pycurl.PROXYTYPE_SOCKS5
        ## pycurl.PROXYTYPE_SOCKS5_HOSTNAME
        self._proxyType = pycurl.PROXYTYPE_HTTP

    def getData(self):
        return {
            "verifySsl": self.verifySsl,
            "timeout": self.timeout,
            "recvLimit": self.recvLimit,
            "chunkLimit": self.chunkLimit,
            "useragents": self.useragents,
            "randomUseragent": self.randomUseragent,
            "extraHeaders": self._extraHeaders,
            "proxyEnabled": self.proxyEnabled,
            "proxyAddress": self.proxyAddress,
            "proxyAuth": self.proxyAuth,
            "proxyType": self.proxyType
        }

    def setData(self, data):
        self.verifySsl = data.get("verifySsl", self.verifySsl)
        self.timeout = data.get("timeout", self.timeout)
        self.recvLimit = data.get("recvLimit", self.recvLimit)
        self.chunkLimit = data.get("chunkLimit", self.chunkLimit)
        self.randomUseragent = data.get("randomUseragent", self.randomUseragent)
        self.proxyEnabled = data.get("proxyEnabled", self.proxyEnabled)
        self.proxyAddress = data.get("proxyAddress", self.proxyAddress)
        self.proxyAuth = data.get("proxyAuth", self.proxyAuth)
        self.proxyType = data.get("proxyType", self.proxyType)

        self.useragents.clear()
        for ua in data.get("useragents", []):
            self.useragents.append(ua)

        self._extraHeaders.clear()
        for eh in data.get("extraHeaders", []):
            self._extraHeaders.append(eh)

    @property
    def verifySsl(self):
        return self._verifySsl

    @verifySsl.setter
    def verifySsl(self, state):
        self._verifySsl = state

    @property
    def timeout(self):
        return self._timeout

    @timeout.setter
    def timeout(self, seconds):
        self._timeout = seconds

    @property
    def recvLimit(self):
        return self._recvLimit

    @recvLimit.setter
    def recvLimit(self, bytes):
        self._recvLimit = bytes

    @property
    def chunkLimit(self):
        return self._chunkLimit

    @chunkLimit.setter
    def chunkLimit(self, bytes):
        self._chunkLimit = bytes

    @property
    def useragents(self):
        return self._useragents

    @property
    def randomUseragent(self):
        return self._randomUseragent

    @randomUseragent.setter
    def randomUseragent(self, state):
        self._randomUseragent = state

    @property
    def extraHeaders(self):
        return self._extraHeaders

    @property
    def proxyEnabled(self):
        return self._proxyEnabled

    @proxyEnabled.setter
    def proxyEnabled(self, state):
        self._proxyEnabled = state

    @property
    def proxyAddress(self):
        return self._proxyAddress

    @proxyAddress.setter
    def proxyAddress(self, address):
        self._proxyAddress = address

    @property
    def proxyAuth(self):
        return self._proxyAuth

    @proxyAuth.setter
    def proxyAuth(self, auth):
        self._proxyAuth = auth

    @property
    def proxyType(self):
        return self._proxyType

    @proxyType.setter
    def proxyType(self, protocol):
        self._proxyType = protocol

    def getUseragent(self):
        if not self.useragents:
            return None

        # Return first useragent string
        if len(self.useragents) == 1 or not self.randomUseragent:
            return self.useragents[0]

        # Return random useragent
        return random.choice(self.useragents)

    def newRequestSettings(self):
        """! Returns a <HttpNewRequestSettings> object that has no references,
        so it can be safely used inside another thread.
        """
        # Make a copy of our self._extraHeaders
        header = self._extraHeaders[:]

        # Get a User-Agent string or None
        useragent = self.getUseragent()

        data = {
            "verifySsl": self.verifySsl,
            "timeout": self.timeout,
            "recvLimit": self.recvLimit,
            "chunkLimit": self.chunkLimit,
            "header": header,
            "useragent": useragent,
            "proxyEnabled": self.proxyEnabled,
            "proxyType": self.proxyType,
            "proxyAddress": self.proxyAddress,
            "proxyAuth": self.proxyAuth
        }

        return HttpNewRequestSettings(data)


class HttpRequest:
    """! HTTP request
    """
    def __init__(self, url, settings, data=None):
        """
        @param url      The URL to request.
        @param settings Read-only request settings, <HttpNewRequestSettings>
        @param data     Dict
        """
        self._url = url
        self._parsedUrl = urlparse(url)
        self._settings = settings
        self._data = data

    @property
    def url(self):
        return self._url

    @property
    def parsedUrl(self):
        return self._parsedUrl

    @property
    def settings(self):
        return self._settings

    @property
    def data(self):
        return self._data

    # @NOTE: check that data is not None before calling!
    def dataString(self):
        return "&".join([f"{k}={v}" for k, v in self.data.items()])


class HttpReponse:
    """! HTTP response base class
    """

    def __init__(self, request, callback):
        """
        @param request  <HttpRequest> object
        @param callback Callback function that will be called when the request
                        has finished and the response is available. The
                        function/method should accept this object as first
                        argument.
        """
        self._request = request
        self._callback = callback
        self._header = ""
        self._content = b''
        self._text = ""
        self._error = ErrorType.Success
        self._errorMsg = None
        self._statusCode = 0
        self._recvLen = 0

    @property
    def request(self):
        return self._request

    @property
    def callback(self):
        return self._callback

    @property
    def content(self):
        return self._content

    @property
    def text(self):
        return self._text

    @property
    def statusCode(self):
        return self._statusCode

    @property
    def error(self):
        return self._error

    @property
    def errorMessage(self):
        return self._errorMsg

    def cancel(self):
        self.setError(ErrorType.Cancelled, "Cancelled")

    def isCancelled(self):
        return self.error == ErrorType.Cancelled

    def writeContent(self, data):
        if self.error != ErrorType.Success:
            # This request got cancelled
            return 0
        dataLen = len(data)
        if (self._recvLen + dataLen) > self.request.settings.recvLimit:
            self.setError(ErrorType.ContentSizeExceeded, "Max content size exceeded")
            return 0
        self._content += data
        return dataLen

    def writeHeader(self, data):
        self._header += data

    def setError(self, error, message):
        self._errorMsg = message
        self._error = error

    def setStatusCode(self, statusCode):
        self._statusCode = statusCode

        if statusCode != 200:
            self.setError(ErrorType.WrongStatus, "")

    def verifyContent(self, httpThread):
        text = None
        try:
            text = self.content.decode("utf-8")
        except UnicodeDecodeError as err:
            self.setError(ErrorType.DecodeError, str(err))
        else:
            self._text = text


class HttpJsonReponse(HttpReponse):
    """! JSON response base class
    """

    ## JSON Schema to validate the response. Subclasses should set this.
    Schema = {}

    def __init__(self, request, callback):
        HttpReponse.__init__(self, request, callback)
        self._json = {}

    def verifySchema(self):
        try:
            JsonValidate(instance=self._json, schema=self.Schema)
        except ValidationError as err:
            self.setError(ErrorType.InvalidSchema, f"InvalidSchema: `{err}`")
        except SchemaError as err:
            self.setError(ErrorType.InvalidSchema, f"InvalidSchema: `{err}`")

    def verifyContent(self, httpThread):
        HttpReponse.verifyContent(self, httpThread)
        if self.error != ErrorType.Success:
            return

        try:
            self._json = json.loads(self.text)
        except json.JSONDecodeError as err:
            self.setError(ErrorType.DecodeError, f"DecodeError: `{err}`")
            return
        self.verifySchema()

    def json(self):
        return self._json


class InstancesJsonReponse(HttpJsonReponse):
    Schema = Schemas["searx_space_instances"]


class HttpImageReponse(HttpReponse):
    def verifyContent(self, httpThread):
        pass


class SearxngHtmlReponse(HttpReponse):
    @property
    def callback(self):
        #if self._linktoken:
        #   return None
        if self.error == ErrorType.Cancelled:
            return None
        return self._callback

    def verifyContent(self, httpThread):
        pass


class HttpThread(Thread):
    def __init__(self):
        Thread.__init__(self)
        self._curlMulti = pycurl.CurlMulti()

        ## list with <pycurl.Curl> objects
        self._curlMulti.handles = []

        ## list with <pycurl.Curl> objects, handles that are idle.
        self._freeHandles = []

        ## Maximum concurrent connections (not host dependent)
        self._maxConn = 10

        ## list with <HttpReponse> objects
        self._queue = []

        ## list with completed <HttpReponse> objects waiting to be
        ## processed by the main thread (on processCallbacks() call).
        self._respQueue = []

        ## Loop breaks when set to True
        self._exit = False

        ## This will be set on cancelAll()
        self._cancelled = False

        ## Limiting concurrent connections to the same host
        self._maxHost = 2
        self._curHosts = {}  # {netloc: open-connection-count}

        # Locks
        self._queueLock = Lock()     # Adding new requests
        self._respQueueLock = Lock() # Getting done requests
        self._exitLock = Lock()
        self._cancelLock = Lock()

        # Init `self._maxConn` amount of handles
        for i in range(self._maxConn):
            handle = pycurl.Curl()
            handle.fp = None
            handle.setopt(pycurl.FOLLOWLOCATION, 1)
            handle.setopt(pycurl.MAXREDIRS, 2)
            handle.setopt(pycurl.NOSIGNAL, 1)
            #handle.setopt(pycurl.CAINFO, "/etc/ssl/certs/ca-certificates.crt") #  make settable
            handle.setopt(pycurl.CAINFO, CA_CRT_PATH)
            self._curlMulti.handles.append(handle)

        # Copy all handles to _freeHandles, since all are idle.
        self._freeHandles = self._curlMulti.handles[:]

    def __subtractOpenHost(self, response):
        """! This is called when a response is ready, it will subtract one
        from the open connection counter of the requested host.
        """
        netloc = response.request.parsedUrl.netloc
        self._curHosts[netloc] -= 1
        if self._curHosts[netloc] == 0:
            del self._curHosts[netloc]

    def exit(self):
        """! Stop the thread loop.

        @note Call .join() on this object to wait till the thread is finished.

        @note This is thread safe and may be called from the main thread, or
              any other thread.
        """
        self._exitLock.acquire()
        self._exit = True
        self._exitLock.release()

    def cancelAll(self):
        """! Cancel all current requests.

        Response callbacks won't be called on cancelled requests.

        @note This is thread safe and may be called from the main thread, or
              any other thread.
        """
        self._cancelLock.acquire()
        self._cancelled = True
        self._cancelLock.release()

    def run(self):
        """! The actual loop that will handle requests.

        @note Do not call this! Call .start() instead.
        """
        while True:
            # These are on hold because there are to many connections to the
            # host.
            doLater = []

            # Add new jobs from the queue
            new = []
            self._queueLock.acquire()
            while self._queue and self._freeHandles:
                response = self._queue.pop(0)
                request = response.request

                # Honor maximum amount of concurrent connections to the same
                # host.
                netloc = request.parsedUrl.netloc
                hostCount = self._curHosts.get(netloc, None)
                if hostCount is None:
                    self._curHosts.update({netloc: 1})
                elif hostCount == self._maxHost:
                    doLater.append(response)
                    continue
                else:
                    self._curHosts[netloc] += 1

                handle = self._freeHandles.pop()
                new.append((response, handle))
            self._queueLock.release()

            for response, handle in new:
                request = response.request
                # Set the requested URL
                handle.setopt(pycurl.URL, request.url)

                # Set POST data
                if request.data:
                    handle.setopt(pycurl.POSTFIELDS, request.dataString())

                # Option: Timeout
                handle.setopt(pycurl.CONNECTTIMEOUT, request.settings.timeout)
                handle.setopt(pycurl.TIMEOUT, request.settings.timeout)

                # Option: Verify SSL
                if request.settings.verifySsl:
                    handle.setopt(pycurl.SSL_VERIFYPEER, 1)
                    handle.setopt(pycurl.SSL_VERIFYHOST, 2)
                else:
                    handle.setopt(pycurl.SSL_VERIFYPEER, 0)
                    handle.setopt(pycurl.SSL_VERIFYHOST, 0)

                # Option: Receive limit
                handle.setopt(pycurl.MAXFILESIZE, request.settings.recvLimit)

                # Option: Proxy
                # @see https://curl.se/libcurl/c/CURLOPT_PROXY.html
                # @see https://curl.se/libcurl/c/CURLOPT_PROXYTYPE.html
                # @see https://curl.se/libcurl/c/CURLOPT_PROXYUSERPWD.html
                if request.settings.proxyEnabled:
                    handle.setopt(pycurl.PROXY, request.settings.proxyAddress)      # netloc:port
                    handle.setopt(pycurl.PROXYUSERPWD, request.settings.proxyAuth)  # username:password
                    handle.setopt(pycurl.PROXYTYPE, request.settings.proxyType)
                else:
                    handle.setopt(pycurl.PROXY, None)
                    handle.setopt(pycurl.PROXYUSERPWD, None)

                # User-Agent
                if request.settings.useragent:
                    handle.setopt(pycurl.USERAGENT, request.settings.useragent)
                else:
                    handle.setopt(pycurl.USERAGENT, "")

                # Accept compression
                handle.setopt(pycurl.ACCEPT_ENCODING, "gzip, deflate")

                # Header
                handle.setopt(pycurl.HTTPHEADER, request.settings.header)

                handle.setopt(pycurl.WRITEFUNCTION, response.writeContent)
                #handle.setopt(pycurl.HEADERFUNCTION, response.writeHeader)

                handle.response = response
                self._curlMulti.add_handle(handle)

            # Perform
            while True:
                ret, num_handles = self._curlMulti.perform()
                if ret != pycurl.E_CALL_MULTI_PERFORM:
                    break

            # Handle completed requests
            while True:
                num_q, ok_list, err_list = self._curlMulti.info_read()

                # Success
                for handle in ok_list:
                    response = handle.response

                    # Subtract this host from open connections
                    self.__subtractOpenHost(response)

                    if not response.isCancelled():
                        response.setStatusCode(handle.getinfo(handle.RESPONSE_CODE))
                        # handle.getinfo(pycurl.EFFECTIVE_URL)

                        if response.error == ErrorType.Success:
                            response.verifyContent(self)

                        # Add the completed task so it can be processed
                        # by the main thread.
                        self._respQueueLock.acquire()
                        self._respQueue.append(response)
                        self._respQueueLock.release()

                    self._curlMulti.remove_handle(handle)
                    self._freeHandles.append(handle)

                # Error
                for handle, errno, errmsg in err_list:
                    response  = handle.response

                    # Subtract this host from open connections
                    self.__subtractOpenHost(response)

                    if not response.isCancelled():
                        response.setStatusCode(handle.getinfo(handle.RESPONSE_CODE))

                        if not response.error:
                            if errno in CurlErrorMap:
                                response.setError(CurlErrorMap[errno], errmsg)
                            else:
                                response.setError(ErrorType.Other, errmsg)

                        # Add the completed task so it can be processed
                        # by the main thread.
                        self._respQueueLock.acquire()
                        self._respQueue.append(response)
                        self._respQueueLock.release()

                    self._curlMulti.remove_handle(handle)
                    self._freeHandles.append(handle)

                if num_q == 0:
                    break

            # Exit
            self._exitLock.acquire()
            if self._exit:
                self._exitLock.release()
                # Wait
                num_q = 1
                while num_q:
                    num_q, ok_list, err_list = self._curlMulti.info_read()
                    time.sleep(0.2)
                break
            self._exitLock.release()

            # Cancel all current requests
            self._cancelLock.acquire()
            if self._cancelled:
                # Clear the queue
                self._queueLock.acquire()
                self._queue.clear()
                self._queueLock.release()

                # These thread will finish but don't callback because they
                # are cancelled.
                for handle in self._curlMulti.handles:
                    if handle in self._freeHandles:
                        continue
                    handle.response.cancel()

                # Clear doLater
                doLater.clear()

                # Callback
                self._cancelled = False
            self._cancelLock.release()

            # Add back requests that are on hold because there are to many
            # open connections to the same host.
            self._queueLock.acquire()
            for response in doLater:
                self._queue.append(response)
            self._queueLock.release()

            # Wait for activity
            self._curlMulti.select(1.0)
            time.sleep(0.2)

    """ Below should be called from main thread """

    def get(self, response):
        """! Add a new GET request to the queue.

        @param response <HttpReponse> or derivative.
        """
        request = response.request
        log.debug(f"NEW REQUEST to <{request.url}>")
        log.debug("--------------------------------")
        log.debug(f"data    : {request.data}")
        log.debug(f"settings: {request.settings}")
        log.debug("")

        self._queueLock.acquire()
        self._queue.append(response)
        self._queueLock.release()

    def processCallbacks(self):
        """! Callback completed requests, this should be run from your
             main thread, not this thread!
        """
        self._respQueueLock.acquire()
        responses = self._respQueue[:]
        self._respQueue.clear()
        self._respQueueLock.release()

        for response in responses:
            request = response.request
            log.debug(f"RESPONSE status {response.statusCode} for "
                      f"<{request.url}> with content size of "
                      f"{len(response.content)}")
            log.debug(f" - Error: {response.error} {response.errorMessage}")
            if response.callback:
                response.callback(response)

        return len(responses)

